Soil data

Downloading and wragling soil data

Author

Kaique Alves

Published

March 4, 2025

Packages

library(tidyverse)
library(cowplot)
library(patchwork)
library(daymetr)
library(raster)
library(spatstat)
library(KrigR)
library(lubridate)
library(sf)
library(viridis)
library(terra)
library(XPolaris)
library(MetBrewer)
Warning

About XPolaris

XPolaris has been removed from CRAN. To install the GitHub available version:

if (!require(XPolaris)) {
  if (!require(devtools)) {
    install.packages("devtools")
  }
  devtools::install_github("lhmrosso/XPolaris")
}

White mold data

Loading white mold data. We are going to use the field coordinates (latitude and longitude) to extract the data from the rasters of soil variables.

About the Data

If you have not downloaded the data necessary to run these analysis, please refer to getting started section before runnig the code below

wm_data = read.csv("data_white-mold/WhiteMoldSurveyWrangledData.csv")

Removing missing coordinates

There are some missing coordinates in the dataset. Here we remove them.

wm_data2 = wm_data %>% 
  filter(!is.na(latitude))

Soil variables

Locations

Setting up the coordinated of each quadrat we need to download the soil data

# exkansas
ny_locations = data.frame(ID = c("NY1","NY2","NY3","NY4","NY5","NY6","NY7","NY8"),
                          lat  = c(42,  42,  42, 42, 43,   43,   43,   43),
                          long = c(-77, -78,-79, -80,-77, -78.0,-79.0, -80))

xplot(locations = ny_locations)+
  geom_point(data = wm_data2 , size = 0.3,
             aes(longitude,latitude))+
  coord_map( xlim = c(-81,-75),
             ylim = c(41.5,44.5))
Coordinate system already present. Adding new coordinate system, which will
replace the existing one.

# max(wm_data2$longitude)

Download soil data

ny_images <- ximages(locations = ny_locations,
                      statistics = c('mean'),
                      variables = c('ph','om','clay',"sand","silt","bd", "hb","n","alpha","ksat","lambda","theta_r","theta_s"),
                      layersdepths = c('0_5'),
                      localPath = file.path("soil_images"))

Here we read the metadata of the downloaded data. It containg the information regarding the directory of each downloaded file.

Merge images

Organic matter

We are going to filter only the lines contain information regarding Organic matter

om_df_images = ny_images %>% 
  filter(variables == "om")

As you can notice in the next figure, the raster objects are downloaded in separate raster of 01 degree size

# read all files for organic matter
om_stack = lapply(om_df_images$local_file, brick)


par(mfrow = c(2,4))

for(i in 1:length(om_stack)){
plot(om_stack[[i]])
}

Therefore, we use the function merge() to create a single raster object covering the whole study region

om_ny_raster =merge(om_stack[[1]],
                    om_stack[[2]],
                    om_stack[[3]],
                    om_stack[[4]],
                    om_stack[[5]],
                    om_stack[[6]],
                    om_stack[[7]],
                    om_stack[[8]])

plot(exp(om_ny_raster))

Automatization

Instead of doing the above step for each variable by hand, here we created a function to do all the steps automatically.

get_soil_var = function(var, data){
  data_filtered = data %>% 
  filter(variables == var)

stack_list = lapply(data_filtered$local_file, brick)
  
  
  merged_raster = merge(stack_list[[1]],
                    stack_list[[2]],
                    stack_list[[3]],
                    stack_list[[4]],
                    stack_list[[5]],
                    stack_list[[6]],
                    stack_list[[7]],
                    stack_list[[8]]
                    )
  return(merged_raster)
}

soil_vars = c('ph','om','clay',"sand","silt","bd", "hb","n","alpha","ksat","lambda","theta_r","theta_s")

selected_vars = c('ph','om','clay',"sand","silt","bd","theta_r","theta_s")
soil_variables_list = lapply(soil_vars,get_soil_var, data = ny_images)
names(soil_variables_list) = soil_vars

saveRDS(soil_variables_list, "soil_images/list_soil_variables_raster.rds")
aggre_var_list = lapply(soil_variables_list, aggregate,fact=30)

saveRDS(aggre_var_list, "soil_images/list_soil_variables_raster_aggregated.rds")

Plot soil maps

NY shape file

Loading New York state shape file.

ny_shape1 = read_sf("shape_files/cugir-007865/cugir-007865/cty036.shp")

Cropping raster files

We use the function lappy() to crop all variables’ rasters using the NY shape file as a mask.

aggre_var_list2 = lapply(aggre_var_list, mask, ny_shape1)

Then here we create a function for ploting the soil maps.

actual_var_names = c("Soil pH in water", "Soil organic matter","Clay","Sand","Silt","Bulk density","Residual soil water content","Saturated soil water content")
actual_var_symbol = c("pH", "OM","Clay","Sand","Silt","BD","\u03B8r","\u03B8s")
actual_var_units = c("", "(%)","(%)","(%)","(%)","(g/cm³)","(m³/m³)","(m³/m³)")

plot_gg_raster =  function(X,raster, var){
  # actual_var_names[X]

if(var[X] == "om"){xx=1}else{xx = 0}
  
as.data.frame(raster[[var[X]]], xy = T) %>%
    filter(layer !="NaN", x< -76.8) %>%
  mutate(layer = case_when(xx ==1~ exp(layer),
                           xx ==0~ layer)) %>% 
    ggplot(aes())+
    geom_raster(aes(x, y, fill = layer))+
    scale_fill_viridis(option ="B",guide = guide_colorbar(barwidth = 0.2, barheight =5 ))+
    geom_sf(data = ny_shape1,
                 # aes(x=long, y = lat, group = group),
                 fill= NA,
                 linewidth =0.2,
                 alpha = 0.5,
                 color = "white")+
  # geom_point(data = wm_data2 , size = 0.1,color = "white",
  #            aes(longitude,latitude))+
    # coord_quickmap(xlim = c(-80,-76.8), ylim = c(42,43.35))+
    coord_sf(xlim = c(-80,-76.8), ylim = c(42,43.35))+
    theme_map()+
    labs(title =paste("    ",actual_var_names[X]),
         fill = paste(actual_var_symbol[X],actual_var_units[X]))
}

# selected_vars
# aggre_var_list[[1]]
# plot_gg_raster(1,aggre_var_list2, var = selected_vars[1] )
as.data.frame(aggre_var_list2$ph, xy = T) %>%
    filter(layer !="NaN", x< -76.8) %>%
  # mutate(layer = case_when(xx ==1~ exp(layer),
  #                          xx ==0~ layer)) %>% 
    ggplot(aes())+
    geom_raster(aes(x, y, fill = layer))+
    scale_fill_viridis(option ="B",guide = guide_colorbar(barwidth = 0.2, barheight =5 ))+
    geom_sf(data = ny_shape1,
                 # aes(x=long, y = lat, group = group),
                 fill= NA,
                 linewidth =0.2,
                 alpha = 0.5,
                 color = "white")

Combo soil maps

Here we plot all maps into a single combo fiure

do.call(patchwork::wrap_plots, lapply(X =1:length(selected_vars) , FUN =plot_gg_raster, raster = aggre_var_list2, var = selected_vars))+
  plot_layout(ncol = 2)+
  plot_annotation(tag_levels = "A")&
  theme(legend.position = "right",
        legend.text = element_text(size = 5),
        legend.title = element_text(size = 5),
        plot.title =  element_text(size = 7, face = "bold"))

ggsave("figs/maps/soil_maps.png", dpi = 900, height = 7, width = 7, bg = "white")

Extract variables to location

wm_data2_uni = wm_data2 %>% 
  group_by(subject) %>% 
  slice(1L)

Selecting coordinate columns from the white mold data set

coords<-data.frame(lon=wm_data2_uni$longitude, lat=wm_data2_uni$latitude)
coordinates(coords)<-c("lon","lat")

Extracting variable from the original merged raster (30 meters resolution)

df1 = lapply(soil_variables_list, extract, coords@coords)
as.data.frame(df1) %>% 
  mutate(subject =wm_data2_uni$subject) %>% 
  cbind(longitude=wm_data2_uni$longitude, 
        latitude=wm_data2_uni$latitude) %>% 
  write.csv("soil_images/extracted_soil_data.csv",row.names = F)

Session Info

sessionInfo()
R version 4.4.1 (2024-06-14 ucrt)
Platform: x86_64-w64-mingw32/x64
Running under: Windows 11 x64 (build 26100)

Matrix products: default


locale:
[1] LC_COLLATE=Portuguese_Brazil.utf8  LC_CTYPE=Portuguese_Brazil.utf8   
[3] LC_MONETARY=Portuguese_Brazil.utf8 LC_NUMERIC=C                      
[5] LC_TIME=Portuguese_Brazil.utf8    

time zone: America/Sao_Paulo
tzcode source: internal

attached base packages:
[1] stats     graphics  grDevices datasets  utils     methods   base     

other attached packages:
 [1] knitr_1.48             MetBrewer_0.2.0        XPolaris_1.0.2        
 [4] terra_1.8-5            viridis_0.6.5          viridisLite_0.4.2     
 [7] sf_1.0-19              KrigR_0.9.4            spatstat_3.3-0        
[10] spatstat.linnet_3.2-3  spatstat.model_3.3-3   rpart_4.1.23          
[13] spatstat.explore_3.3-3 nlme_3.1-164           spatstat.random_3.3-2 
[16] spatstat.geom_3.3-4    spatstat.univar_3.1-1  spatstat.data_3.1-4   
[19] raster_3.6-30          sp_2.1-4               daymetr_1.7.1         
[22] patchwork_1.3.0        cowplot_1.1.3          lubridate_1.9.3       
[25] forcats_1.0.0          stringr_1.5.1          dplyr_1.1.4           
[28] purrr_1.0.2            readr_2.1.5            tidyr_1.3.1           
[31] tibble_3.2.1           ggplot2_3.5.1          tidyverse_2.0.0       

loaded via a namespace (and not attached):
 [1] DBI_1.2.3             pbapply_1.7-2         deldir_2.0-4         
 [4] gridExtra_2.3         rlang_1.1.4           magrittr_2.0.3       
 [7] e1071_1.7-16          compiler_4.4.1        mgcv_1.9-1           
[10] systemfonts_1.1.0     maps_3.4.2.1          vctrs_0.6.5          
[13] ecmwfr_2.0.3          crayon_1.5.3          pkgconfig_2.0.3      
[16] fastmap_1.2.0         labeling_0.4.3        utf8_1.2.4           
[19] rmarkdown_2.28        tzdb_0.4.0            ragg_1.3.3           
[22] automap_1.1-12        xfun_0.48             cachem_1.1.0         
[25] jsonlite_1.8.9        progress_1.2.3        goftest_1.2-3        
[28] reshape_0.8.9         spatstat.utils_3.1-1  prettyunits_1.2.0    
[31] parallel_4.4.1        R6_2.5.1              stringi_1.8.4        
[34] stars_0.6-7           Rcpp_1.0.13           iterators_1.0.14     
[37] tensor_1.5            zoo_1.8-12            snow_0.4-4           
[40] FNN_1.1.4.1           Matrix_1.7-0          splines_4.4.1        
[43] timechange_0.3.0      tidyselect_1.2.1      rstudioapi_0.17.0    
[46] abind_1.4-8           yaml_2.3.10           codetools_0.2-20     
[49] curl_6.0.1            lattice_0.22-6        intervals_0.15.5     
[52] plyr_1.8.9            withr_3.0.2           evaluate_1.0.1       
[55] units_0.8-5           proxy_0.4-27          polyclip_1.10-7      
[58] xts_0.14.1            pillar_1.9.0          KernSmooth_2.23-24   
[61] renv_1.1.2            foreach_1.5.2         ncdf4_1.23           
[64] generics_0.1.3        spacetime_1.3-2       hms_1.1.3            
[67] munsell_0.5.1         scales_1.3.0          class_7.3-22         
[70] glue_1.8.0            mapproj_1.2.11        tools_4.4.1          
[73] grid_4.4.1            colorspace_2.1-1      cli_3.6.3            
[76] spatstat.sparse_3.1-0 gstat_2.1-2           textshaping_0.4.0    
[79] fansi_1.0.6           doSNOW_1.0.20         gtable_0.3.5         
[82] digest_0.6.37         classInt_0.4-10       htmlwidgets_1.6.4    
[85] farver_2.1.2          memoise_2.0.1         htmltools_0.5.8.1    
[88] lifecycle_1.0.4       httr_1.4.7